import json

import pandas as pd
import re

api_offernum = {}


rx = re.compile(r'(?:www\.|https?:)\S*?(?:\.(?:com|org|net|io)|(?=\s)|$)', re.IGNORECASE)

filename = r"D:\Chrome_download\Correted-ProgrammableWeb-dataset-main\data\raw\accessibility\api_accessibility\api_version_accessbiliby-"

for i in range(5):
    txtfile = open(filename+str(i+1)+'.txt')
    txtcontent = txtfile.read()
    jsondata = json.loads(txtcontent)
    for api in jsondata:
        if api != None:
            for offer_api in api['visit_status']:
                if offer_api:
                    visit_url = offer_api['visit_url']
                    root_url = rx.findall(visit_url)
                    if(len(root_url) == 0):
                        print(visit_url)

                    if root_url[0] in api_offernum.keys():
                        api_offernum[root_url[0]] += 1
                    else:
                        api_offernum[root_url[0]] = 1
        

df = pd.DataFrame.from_dict(api_offernum, orient='index')
df.to_csv("api_offernum.csv")





